{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# AdventureWorks: resit questions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Setting default log level to \"WARN\".\n",
      "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n"
     ]
    }
   ],
   "source": [
    "import findspark\n",
    "import pandas as pd\n",
    "findspark.init()\n",
    "\n",
    "SVR = '192.168.31.31'\n",
    "from pyspark.sql import SparkSession\n",
    "from pyspark.sql.functions import *\n",
    "from pyspark.sql import Window\n",
    "\n",
    "sc = (SparkSession.builder.appName('app14-4') \n",
    "      .master(f'spark://{SVR}:7077') \n",
    "      .config('spark.sql.warehouse.dir', f'hdfs://{SVR}:9000/user/hive/warehouse') \n",
    "      .config('spark.cores.max', '4') \n",
    "      .config('spark.executor.instances', '1') \n",
    "      .config('spark.executor.cores', '2') \n",
    "      .config('spark.executor.memory', '10g') \n",
    "      .enableHiveSupport().getOrCreate())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "cust_aw = sc.read.table('sqlzoo.CustomerAW')\n",
    "cust_addr = sc.read.table('sqlzoo.CustomerAddress')\n",
    "addr = sc.read.table('sqlzoo.Address')\n",
    "product = sc.read.table('sqlzoo.Product')\n",
    "order_det = sc.read.table('sqlzoo.SalesOrderDetail')\n",
    "order_head = sc.read.table('sqlzoo.SalesOrderHeader')\n",
    "prod_model = sc.read.table('sqlzoo.ProductModel')\n",
    "prod_model_prod = sc.read.table('sqlzoo.ProductModelProductDescription')\n",
    "prod_desc = sc.read.table('sqlzoo.ProductDescription')\n",
    "prod_cat = sc.read.table('sqlzoo.ProductCategory')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1.\n",
    "**List the SalesOrderNumber for the customer 'Good Toys' 'Bike World'**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                                \r"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SalessOrderNumber</th>\n",
       "      <th>CompanyName</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>None</td>\n",
       "      <td>Bike World</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>SO71774</td>\n",
       "      <td>Good Toys</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  SalessOrderNumber CompanyName\n",
       "0              None  Bike World\n",
       "1           SO71774   Good Toys"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(order_head\n",
    " .join(cust_aw\n",
    "       .filter(cust_aw['CompanyName'].isin(['Good Toys', 'Bike World'])),\n",
    "       on='CustomerID', how='right')\n",
    " .select('SalessOrderNumber', 'CompanyName')\n",
    " .toPandas())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.\n",
    "**List the ProductName and the quantity of what was ordered by 'Futuristic Bikes'**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>OrderQty</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>ML Mountain Seat/Saddle</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Long-Sleeve Logo Jersey, L</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Classic Vest, S</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                         Name  OrderQty\n",
       "0     ML Mountain Seat/Saddle         2\n",
       "1  Long-Sleeve Logo Jersey, L         2\n",
       "2             Classic Vest, S         3"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(product.join(order_det, on='ProductID', how='right')\n",
    " .join(order_head, on='SalesOrderID', how='right')\n",
    " .join(cust_aw.filter(cust_aw['CompanyName']=='Futuristic Bikes'), \n",
    "        on='CustomerID', how='right')\n",
    " .select('Name', 'OrderQty')\n",
    " .toPandas())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.\n",
    "**List the name and addresses of companies containing the word 'Bike' (upper or lower case) and companies containing 'cycle' (upper or lower case). Ensure that the 'bike's are listed before the 'cycles's.**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CompanyName</th>\n",
       "      <th>AddressLine1</th>\n",
       "      <th>AddressLine2</th>\n",
       "      <th>City</th>\n",
       "      <th>StateProvince</th>\n",
       "      <th>CountryRegion</th>\n",
       "      <th>PostalCode</th>\n",
       "      <th>flag</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A Bike Store</td>\n",
       "      <td>2251 Elliot Avenue</td>\n",
       "      <td>None</td>\n",
       "      <td>Seattle</td>\n",
       "      <td>Washington</td>\n",
       "      <td>United States</td>\n",
       "      <td>98104</td>\n",
       "      <td>bike</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A Typical Bike Shop</td>\n",
       "      <td>One Dancing, Rr</td>\n",
       "      <td>No. 25 Box 8033</td>\n",
       "      <td>Round Rock</td>\n",
       "      <td>Texas</td>\n",
       "      <td>United States</td>\n",
       "      <td>78664</td>\n",
       "      <td>bike</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Advanced Bike Components</td>\n",
       "      <td>12345 Sterling Avenue</td>\n",
       "      <td>None</td>\n",
       "      <td>Irving</td>\n",
       "      <td>Texas</td>\n",
       "      <td>United States</td>\n",
       "      <td>75061</td>\n",
       "      <td>bike</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Area Bike Accessories</td>\n",
       "      <td>6900 Sisk Road</td>\n",
       "      <td>None</td>\n",
       "      <td>Modesto</td>\n",
       "      <td>California</td>\n",
       "      <td>United States</td>\n",
       "      <td>95354</td>\n",
       "      <td>bike</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Associated Bikes</td>\n",
       "      <td>5420 West 22500 South</td>\n",
       "      <td>None</td>\n",
       "      <td>Salt Lake City</td>\n",
       "      <td>Utah</td>\n",
       "      <td>United States</td>\n",
       "      <td>84101</td>\n",
       "      <td>bike</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>187</th>\n",
       "      <td>Unicycles, Bicycles, and Tricycles</td>\n",
       "      <td>Stonewood Mall</td>\n",
       "      <td>None</td>\n",
       "      <td>Downey</td>\n",
       "      <td>California</td>\n",
       "      <td>United States</td>\n",
       "      <td>90241</td>\n",
       "      <td>cycle</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>188</th>\n",
       "      <td>Valley Bicycle Distributors</td>\n",
       "      <td>5867 Sunrise Boulevard</td>\n",
       "      <td>None</td>\n",
       "      <td>Citrus Heights</td>\n",
       "      <td>California</td>\n",
       "      <td>United States</td>\n",
       "      <td>95610</td>\n",
       "      <td>cycle</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>189</th>\n",
       "      <td>Valley Bicycle Specialists</td>\n",
       "      <td>Blue Ridge Mall</td>\n",
       "      <td>None</td>\n",
       "      <td>Kansas City</td>\n",
       "      <td>Missouri</td>\n",
       "      <td>United States</td>\n",
       "      <td>64106</td>\n",
       "      <td>cycle</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>190</th>\n",
       "      <td>Westside Cycle Store</td>\n",
       "      <td>25550 Executive Dr</td>\n",
       "      <td>None</td>\n",
       "      <td>Elgin</td>\n",
       "      <td>Illinois</td>\n",
       "      <td>United States</td>\n",
       "      <td>60120</td>\n",
       "      <td>cycle</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>191</th>\n",
       "      <td>Yellow Bicycle Company</td>\n",
       "      <td>St. Louis Marketplace</td>\n",
       "      <td>None</td>\n",
       "      <td>Saint Louis</td>\n",
       "      <td>Missouri</td>\n",
       "      <td>United States</td>\n",
       "      <td>63103</td>\n",
       "      <td>cycle</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>192 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                            CompanyName            AddressLine1  \\\n",
       "0                          A Bike Store      2251 Elliot Avenue   \n",
       "1                   A Typical Bike Shop         One Dancing, Rr   \n",
       "2              Advanced Bike Components   12345 Sterling Avenue   \n",
       "3                 Area Bike Accessories          6900 Sisk Road   \n",
       "4                      Associated Bikes   5420 West 22500 South   \n",
       "..                                  ...                     ...   \n",
       "187  Unicycles, Bicycles, and Tricycles          Stonewood Mall   \n",
       "188         Valley Bicycle Distributors  5867 Sunrise Boulevard   \n",
       "189          Valley Bicycle Specialists         Blue Ridge Mall   \n",
       "190                Westside Cycle Store      25550 Executive Dr   \n",
       "191              Yellow Bicycle Company   St. Louis Marketplace   \n",
       "\n",
       "        AddressLine2            City StateProvince  CountryRegion PostalCode  \\\n",
       "0               None         Seattle    Washington  United States      98104   \n",
       "1    No. 25 Box 8033      Round Rock         Texas  United States      78664   \n",
       "2               None          Irving         Texas  United States      75061   \n",
       "3               None         Modesto    California  United States      95354   \n",
       "4               None  Salt Lake City          Utah  United States      84101   \n",
       "..               ...             ...           ...            ...        ...   \n",
       "187             None          Downey    California  United States      90241   \n",
       "188             None  Citrus Heights    California  United States      95610   \n",
       "189             None     Kansas City      Missouri  United States      64106   \n",
       "190             None           Elgin      Illinois  United States      60120   \n",
       "191             None     Saint Louis      Missouri  United States      63103   \n",
       "\n",
       "      flag  \n",
       "0     bike  \n",
       "1     bike  \n",
       "2     bike  \n",
       "3     bike  \n",
       "4     bike  \n",
       "..     ...  \n",
       "187  cycle  \n",
       "188  cycle  \n",
       "189  cycle  \n",
       "190  cycle  \n",
       "191  cycle  \n",
       "\n",
       "[192 rows x 8 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(cust_aw.join(cust_addr, on='CustomerID')\n",
    " .join(addr, on='AddressID')\n",
    " .select('CompanyName', 'AddressLine1', 'AddressLine2', 'City',\n",
    "       'StateProvince', 'CountryRegion', 'PostalCode')\n",
    " .withColumn('flag', when(lower(col('CompanyName')).like('%bike%'), 'bike')\n",
    "             .when(lower(col('CompanyName')).like('%cycle%'), 'cycle'))\n",
    " .dropna(subset=['flag'])\n",
    " .orderBy('flag', 'CompanyName')\n",
    " .toPandas())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4.\n",
    "**Show the total order value for each CountryRegion. List by value with the highest first.**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CountryRegion</th>\n",
       "      <th>sum(SubTotal)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>518096.42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>United States</td>\n",
       "      <td>347336.69</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    CountryRegion  sum(SubTotal)\n",
       "0  United Kingdom      518096.42\n",
       "1   United States      347336.69"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(addr.join(order_head, on=(addr['AddressID']==order_head['ShipToAddressID']))\n",
    " .groupby('CountryRegion')\n",
    " .sum('SubTotal')\n",
    " .orderBy(col('sum(SubTotal)').desc())\n",
    " .toPandas())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5.\n",
    "**Find the best customer in each region.**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CountryRegion</th>\n",
       "      <th>CompanyName</th>\n",
       "      <th>sum(SubTotal)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>Action Bicycle Specialists</td>\n",
       "      <td>108561.83</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>United States</td>\n",
       "      <td>Eastside Department Store</td>\n",
       "      <td>83858.43</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    CountryRegion                 CompanyName  sum(SubTotal)\n",
       "0  United Kingdom  Action Bicycle Specialists      108561.83\n",
       "1   United States   Eastside Department Store       83858.43"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(addr.join(order_head, on=(addr['AddressID']==order_head['ShipToAddressID']))\n",
    " .join(cust_aw, on='CustomerID')\n",
    " .groupBy('CountryRegion', 'CompanyName')\n",
    " .sum('SubTotal')\n",
    " .withColumn('sn', rank().over(\n",
    "     Window.partitionBy('CountryRegion').orderBy(col('sum(SubTotal)').desc())))\n",
    " .filter(col('sn')==1)\n",
    " .select('CountryRegion', 'CompanyName', 'sum(SubTotal)')\n",
    " .toPandas())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.stop()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
